# display plots inline
%matplotlib notebook
# imports
import os
import numpy as np
import pandas as pd
import pymc3 as pm
from bambi import Model, Prior
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
import pymc3_utils as pmu
# suppress system warnings for legibility
import warnings
warnings.filterwarnings('ignore')
# resize plots to fit labels inside bounding box
from matplotlib import rcParams
rcParams.update({'figure.autolayout': True})
# MPI color scheme
sns.set(style='white', palette='Set2')
df_reading = pd.read_csv('data/tamil_reading.tsv', sep='\t')
df_span = pd.read_csv('data/span_intercepts.tsv', sep='\t')[['pp', 'raw_span_mean', 'span_intercept']]
df_ravens = pd.read_csv('data/ravens_intercepts.tsv', sep='\t')[['pp', 'raw_ravens_mean', 'ravens_intercept']]
df_reading = df_reading.merge(df_ravens, left_on='pp', right_on='pp')
df_reading = df_reading.merge(df_span, left_on='pp', right_on='pp')
display(df_reading.head().round(2))
We suspect reading scores are highly correlated with our cognitive ability measures (ravens and digit span). To get a better idea of these relationships, we will draw a heatmap of absolute (rectified) correlations.
# check correlations
corrs = df_reading[[
'word', 'pseudoword',
'raw_span_mean', 'span_intercept',
'raw_ravens_mean', 'ravens_intercept']].corr().round(2)
display(corrs)
diag = np.eye(*corrs.shape)
diag[diag == 1] = np.nan
corrs = np.abs(corrs + diag)
g = sns.heatmap(corrs, cmap='magma', vmin=0, vmax=1, annot=True)
g.set_xticklabels(
g.get_xticklabels(),
rotation=45, ha='right')
g.set_yticklabels(
g.get_yticklabels(),
rotation=0)
g.set(ylim=(len(corrs), 0))
g.set_title('absolute correlations between predictors', pad=12)
plt.savefig('figures/precorrection_heatmap.pdf')
plt.savefig('figures/precorrection_heatmap.png', dpi=600, bbox_inches='tight')
If we want to use reading score as a predictor of literacy, independent of cognitive ability (and other relevant factors that were likely unintentionally captured in the cognitive ability scores, such as familiarity with formal test-taking) we will need to correct the reading scores. Our correction procedure consists of regressing out common variance with the cognitive ability measures.
df_reading['literate'] = df_reading['literate'].replace({'y': 'literate', 'low': 'low-literate', 'n': 'illiterate'})
df_reading = pd.melt(df_reading, id_vars=['subject', 'pp', 'literate',
'raw_ravens_mean', 'ravens_intercept',
'raw_span_mean', 'span_intercept'],
value_vars=['word', 'pseudoword'], var_name='task', value_name='score')
display(df_reading.head().round(2))
To confirm that the reading scores is indeed associated with literacy in a meaningful way, we are plotting the distribution of word and pseudoword reading scores in each self-reported literacy group (literate, low-literate, and illiterate).
# plot reading score distributions
g = sns.catplot(x='task', y='score', hue='literate',
hue_order=['illiterate', 'low-literate', 'literate'],
scale='count', cut=0, inner='quartile',
kind='violin', data=df_reading, legend=False)
g.set(ylim=(0, 100), xlabel='', ylabel='number of words read correctly')
g.ax.legend(loc='upper right', frameon=False)
plt.savefig('figures/reading_scores.pdf')
plt.savefig('figures/reading_scores.png', dpi=600, bbox_inches='tight')
Self-reported literacy is strongly associated with word reading scores, but there is clearly overlap between the categories (i.e., some participants reported being low-literate, but scored better than other participants that reported being fully literate) which is likely due to participants finding it difficult to estimate their own literacy relative to others. We will therefore use the measured word reading scores as our measure of literacy in the analyses reported below.
# standardize variables
df_reading['task_z'] = pmu.standardize(pd.get_dummies(df_reading['task'])['word'])
df_reading['ravens_z'] = pmu.standardize(df_reading['raw_ravens_mean'])
df_reading['span_z'] = pmu.standardize(df_reading['raw_span_mean'])
display(df_reading.head().round(2))
Because word reading scores range from 0 to 100 (with hard boundaries) it would be incorrect to model them using a linear regression model. Instead we are modelling each word in the reading task as a Bernoulli trial, using a generalized linear (i.e., logistic) model. In order to make this possible, we are going to expand each participants' reading score into 100 trials.
# expand binomial dataset to bernoulli trials for modeling
df_bernoulli = pmu.expand_binomial(df_reading, 'score', 100)
display(df_bernoulli.head().round(2))
# default model params
defaults = {
'samples': 5000,
'tune': 2500,
'chains': 4,
'init': 'advi+adapt_diag',
'family': 'bernoulli',
'priors': {'fixed': 'narrow', 'random': 'narrow'},
}
We expect a model with task (word versus pseudoword), digit span, and ravens score, plus the interactions between digit span and task, and ravens score and task as predictors to fit best, but we will also compare models with subsets of those predictors to see if a more parsimonious model perhaps has just as good a fit.
fixed_task = Model(df_bernoulli)
fixed_task.fit('score_bernoulli ~ task_z',
**defaults)
fixed_task_span = Model(df_bernoulli)
fixed_task_span.fit('score_bernoulli ~ task_z + span_z',
**defaults)
fixed_task_ravens = Model(df_bernoulli)
fixed_task_ravens.fit('score_bernoulli ~ task_z + ravens_z',
**defaults)
fixed_task_span_ravens = Model(df_bernoulli)
fixed_task_span_ravens.fit('score_bernoulli ~ task_z + span_z + ravens_z',
**defaults)
fixed_taskxspanxravens = Model(df_bernoulli)
fixed_taskxspanxravens.fit('score_bernoulli ~ task_z*span_z + task_z*ravens_z',
**defaults)
from importlib import reload
reload(pmu)
g_comparison, comparison = pmu.compare({
'task': fixed_task,
'task + span': fixed_task_span,
'task + ravens': fixed_task_ravens,
'task + span + ravens': fixed_task_span_ravens,
'task * span + task * ravens': fixed_taskxspanxravens,
}, ic='LOO')
display(comparison)
plt.savefig('figures/reading_model_comparison.pdf')
plt.savefig('figures/reading_model_comparison.png', dpi=600, bbox_inches='tight')
As expected, the most complex model fits best, but interestingly, the reading_score ~ task + span + ravens model without interactions fits just as well (after correcting for model complexity) meaning that there is no meaningful interaction between task and the other two predictors. We will now refit this nicely parsimonious model with by-participant intercepts in order to be able to use these intercepts as corrected reading scores in our recognition memory models.
mixed_taskxspanxravens = Model(df_bernoulli)
mixed_taskxspanxravens.fit('score_bernoulli ~ task_z*span_z + task_z*ravens_z',
random=['1|pp'],
**defaults)
display(pm.summary(mixed_taskxspanxravens.backend.trace, credible_interval=.95).round(2))
NUTS warns that the number of effective samples is smaller than 10% for some parameters. This can be an indicator of several sampling issues, most likely mild autocorrelation. The sampling stats still show more than 1.5K effective samples for all parameters and $\hat{r}$ values also look good.
We will plot model traces below to look at the autocorrelation issue.
g_traces = pm.traceplot(mixed_taskxspanxravens.backend.trace)
plt.savefig('figures/reading_model_traces.png', dpi=600)
The posterior traces (right column) show signs of very mild autocorrelation, but nothing pathological. (We appear to be exploring the parameter space fairly well.)
Posterior densities (left column) also look well-behaved (i.e. unimodal, roughly normally distributed).
pps = df_reading['pp'].unique()
pp_nums = [f'1|pp[{i}]' for i in range(len(pps))]
df_intercepts = pm.summary(mixed_taskxspanxravens.backend.trace).loc[pp_nums]
df_intercepts['pp'] = np.sort(pps)
display(df_intercepts.head().round(2))
df_uncorrected = df_reading.groupby('pp', as_index=False).mean().rename(columns={'score': 'raw_reading_score'})
df_intercepts = df_intercepts[['pp', 'mean']].rename(columns={'mean': 'adjusted_reading_score'})
df_intercepts = df_intercepts.merge(df_uncorrected,
left_on='pp', right_on='pp').reset_index()
display(df_intercepts.head().round(2))
# write intercepts to file
df_intercepts.to_csv('data/reading_intercepts.tsv', sep='\t')
To make sure our score correction had the desired effect, we plot a heatmap of absolute correlations between the cognitive ability measures and both the corrected and uncorrected reading scores.
# check correlations
corrs = df_intercepts[[
'raw_reading_score',
'raw_span_mean',
'raw_ravens_mean',
'adjusted_reading_score',
]].corr().round(2)
display(corrs)
upper = np.triu(np.ones(corrs.shape))
upper[upper == 1] = np.nan
corrs = np.abs(corrs + upper)
plt.figure(figsize=(5, 3.5))
g = sns.heatmap(corrs, cmap='magma', vmin=0, vmax=1, annot=True)
labels = [
'raw reading score',
'digit span',
'raven\'s score',
'adjusted reading score',
]
g.set_xticklabels(
labels,
rotation=30, ha='right')
g.set_yticklabels(
labels,
rotation=0)
g.set(ylim=(len(corrs), 1), xlim=(0, len(corrs) - 1))
g.set_title('absolute correlations between predictors', pad=12)
plt.savefig('figures/postcorrection_heatmap.pdf')
plt.savefig('figures/postcorrection_heatmap.png', dpi=600, bbox_inches='tight')
Correlation between unadjusted reading and both cognitive ability measures was .51, after correction the correlation has been reduced to less than .05, while unadjusted and adjusted reading score still share around 50% of their variance (r = .71).
An anonymous reviewer asked whether correcting reading score only for Raven's Progressive Matrices score (and not for digit span) would result in a different coefficient estimate for adjusted reading score in the recognition memory model. To answer the reviewer's question, we repeat the adjustment procedure with Raven's score, but without digit span score.
mixed_task_ravens = Model(df_bernoulli)
mixed_task_ravens.fit('score_bernoulli ~ task_z*ravens_z',
random=['1|pp'],
**defaults)
pps = df_reading['pp'].unique()
pp_nums = [f'1|pp[{i}]' for i in range(len(pps))]
df_intercepts = pm.summary(mixed_task_ravens.backend.trace).loc[pp_nums]
df_intercepts['pp'] = np.sort(pps)
display(df_intercepts.head().round(2))
df_uncorrected = df_reading.groupby('pp', as_index=False).mean().rename(columns={'score': 'raw_reading_score'})
df_intercepts = df_intercepts[['pp', 'mean']].rename(columns={'mean': 'adjusted_reading_score'})
df_intercepts = df_intercepts.merge(df_uncorrected,
left_on='pp', right_on='pp').reset_index()
display(df_intercepts.head().round(2))
# write intercepts to file
df_intercepts.to_csv('data/reading_intercepts_adjusted_for_only_ravens.tsv', sep='\t')